5  Skin: Post-clustering comprehensive marker identification

5.1 Set up Seurat workspace

# Load libraries
library(data.table)
library(devtools)
library(presto)
library(glmGamPoi)
library(sctransform) 
library(Seurat)
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)

# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')

5.2 Load previously saved object

merged.18279.skin.singlets <- readRDS("Skin_scRNA_Part3.rds")

5.3 Set idents to preferred initial clustering resolution

Idents(merged.18279.skin.singlets) <- merged.18279.skin.singlets$RNA_snn_res.0.8
merged.18279.skin.singlets$seurat_clusters <- merged.18279.skin.singlets$RNA_snn_res.0.8
DimPlot(merged.18279.skin.singlets, reduction="umap.harmony", label=TRUE)

5.4 Detect markers with FindAllMarkers

fam <- FindAllMarkers(merged.18279.skin.singlets,
                      assay="RNA",
                      slot="data",
                      only.pos = TRUE, 
                      logfc.threshold = 0.25)
Calculating cluster 0
Calculating cluster 1
Calculating cluster 2
Calculating cluster 3
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 7
Calculating cluster 8
Calculating cluster 9
Calculating cluster 10
Calculating cluster 11
Calculating cluster 12
Calculating cluster 13
Calculating cluster 14
Calculating cluster 15
Calculating cluster 16
Calculating cluster 17
Calculating cluster 18
Calculating cluster 19
Calculating cluster 20
Calculating cluster 21
Calculating cluster 22
Calculating cluster 23
Calculating cluster 24
Calculating cluster 25
Calculating cluster 26
head(fam)
         p_val avg_log2FC pct.1 pct.2 p_val_adj cluster     gene
S100A9       0   3.955395 0.896 0.126         0       0   S100A9
C5AR1        0   3.610838 0.895 0.130         0       0    C5AR1
APOBEC3A     0   4.327725 0.866 0.116         0       0 APOBEC3A
SERPINA1     0   3.751497 0.871 0.121         0       0 SERPINA1
IL1RN        0   3.784982 0.944 0.199         0       0    IL1RN
C15orf48     0   3.431901 0.887 0.147         0       0 C15orf48

5.5 Plot top markers per cluster as dotplot

fam_top <- fam %>%
    mutate(diff = pct.1 - pct.2) %>%
    dplyr::filter(pct.1 > 0.25 & diff > 0.1 & pct.2 < 0.1 & p_val_adj < 0.01) %>%
    group_by(cluster) %>%
    slice_head(n=10) %>%
    pull(gene) %>%
    unique()

# Compute aggregated expression values of these genes and cluster them to order the figure
rna <- AverageExpression(merged.18279.skin.singlets,assay="RNA",slot="data")
As of Seurat v5, we recommend using AggregateExpression to perform pseudo-bulk analysis.
First group.by variable `ident` starts with a number, appending `g` to ensure valid variable names
This message is displayed once per session.
rna.sub <- rna$RNA[fam_top,]
cors.genes <- as.dist(1-cor(as.matrix(t(rna.sub)),method="pearson"))
hc.genes <- hclust(cors.genes)
fam_top.sorted <- rownames(rna.sub)[hc.genes$order]

# Plot
DotPlot(merged.18279.skin.singlets,
        features = fam_top.sorted,
        assay = "RNA",
        cols=c("blue","red"),
        cluster.idents=T) + 
  RotatedAxis()

5.6 Subset to T/NK lineage and compute markers just among these clusters

merged.18279.skin.singlets.tnk <- subset(merged.18279.skin.singlets,
                                    subset = seurat_clusters %in% c(1,2,4,5,6,9,11,13,19))
fam.tnk <- FindAllMarkers(merged.18279.skin.singlets.tnk,
                      assay="RNA",
                      slot="data",
                      only.pos = T, 
                      logfc.threshold = 0.25)
Calculating cluster 1
Calculating cluster 2
Calculating cluster 4
Calculating cluster 5
Calculating cluster 6
Calculating cluster 9
Calculating cluster 11
Calculating cluster 13
Calculating cluster 19
head(fam.tnk)
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster   gene
LTB        0  1.7198102 0.813 0.327         0       1    LTB
ICOS       0  1.6732385 0.724 0.257         0       1   ICOS
IL7R       0  1.3524027 0.853 0.402         0       1   IL7R
SPOCK2     0  1.3218146 0.832 0.397         0       1 SPOCK2
CD3E       0  0.9763243 0.888 0.474         0       1   CD3E
CD3D       0  0.9674333 0.773 0.364         0       1   CD3D

5.7 Plot top markers per cluster as dotplot

fam.tnk_top <- fam.tnk %>%
    mutate(diff = pct.1 - pct.2) %>%
    dplyr::filter(pct.1 > 0.25 & diff > 0.1 & pct.2 < 0.1 & p_val_adj < 0.01) %>%
    group_by(cluster) %>%
    slice_head(n=10) %>%
    pull(gene) %>%
    unique()

# Compute aggregated expression values of these genes and cluster them to order the figure
rna <- AverageExpression(merged.18279.skin.singlets,assay="RNA",slot="data")
rna.sub <- rna$RNA[fam.tnk_top,]
cors.genes <- as.dist(1-cor(as.matrix(t(rna.sub)),method="pearson"))
hc.genes <- hclust(cors.genes)
fam.tnk_top.sorted <- rownames(rna.sub)[hc.genes$order]

# Plot
DotPlot(merged.18279.skin.singlets,
        features = fam.tnk_top.sorted,
        assay = "RNA",
        cols=c("blue","red"),
        cluster.idents=T) + 
  RotatedAxis()

5.8 Get sessionInfo

sessionInfo()
R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)

Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so;  LAPACK version 3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: America/New_York
tzcode source: system (glibc)

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] cellXY_0.99.0               scDblFinder_1.14.0         
 [3] harmony_1.2.0               alevinQC_1.16.1            
 [5] vctrs_0.6.5                 patchwork_1.3.0            
 [7] scater_1.30.1               scuttle_1.12.0             
 [9] speckle_1.0.0               Matrix_1.6-4               
[11] fishpond_2.6.2              readxl_1.4.3               
[13] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
[15] Biobase_2.62.0              GenomicRanges_1.54.1       
[17] GenomeInfoDb_1.38.8         IRanges_2.36.0             
[19] S4Vectors_0.40.2            BiocGenerics_0.48.1        
[21] MatrixGenerics_1.14.0       matrixStats_1.4.1          
[23] flexmix_2.3-19              lattice_0.22-6             
[25] SeuratWrappers_0.3.19       miQC_1.8.0                 
[27] lubridate_1.9.3             forcats_1.0.0              
[29] stringr_1.5.1               dplyr_1.1.4                
[31] purrr_1.0.2                 readr_2.1.5                
[33] tidyr_1.3.1                 tibble_3.2.1               
[35] ggplot2_3.5.1               tidyverse_2.0.0            
[37] Seurat_5.1.0                SeuratObject_5.0.2         
[39] sp_2.1-4                    sctransform_0.4.1          
[41] glmGamPoi_1.12.2            presto_1.0.0               
[43] Rcpp_1.0.13-1               devtools_2.4.5             
[45] usethis_3.0.0               data.table_1.16.2          

loaded via a namespace (and not attached):
  [1] fs_1.6.5                  spatstat.sparse_3.1-0    
  [3] bitops_1.0-9              httr_1.4.7               
  [5] RColorBrewer_1.1-3        profvis_0.4.0            
  [7] tools_4.3.2               utf8_1.2.4               
  [9] R6_2.5.1                  DT_0.33                  
 [11] lazyeval_0.2.2            uwot_0.2.2               
 [13] urlchecker_1.0.1          withr_3.0.2              
 [15] GGally_2.2.1              gridExtra_2.3            
 [17] progressr_0.15.1          cli_3.6.3                
 [19] spatstat.explore_3.2-6    fastDummies_1.7.3        
 [21] labeling_0.4.3            spatstat.data_3.1-4      
 [23] ggridges_0.5.6            pbapply_1.7-2            
 [25] Rsamtools_2.18.0          R.utils_2.12.3           
 [27] parallelly_1.39.0         sessioninfo_1.2.2        
 [29] limma_3.58.1              RSQLite_2.3.8            
 [31] BiocIO_1.12.0             generics_0.1.3           
 [33] gtools_3.9.5              ica_1.0-3                
 [35] spatstat.random_3.2-2     ggbeeswarm_0.7.2         
 [37] fansi_1.0.6               abind_1.4-8              
 [39] R.methodsS3_1.8.2         lifecycle_1.0.4          
 [41] yaml_2.3.10               edgeR_4.0.16             
 [43] SparseArray_1.2.2         Rtsne_0.17               
 [45] blob_1.2.4                grid_4.3.2               
 [47] dqrng_0.4.1               promises_1.3.0           
 [49] crayon_1.5.3              shinydashboard_0.7.2     
 [51] miniUI_0.1.1.1            beachmat_2.18.1          
 [53] cowplot_1.1.3             KEGGREST_1.42.0          
 [55] metapod_1.10.1            pillar_1.9.0             
 [57] knitr_1.45                rjson_0.2.23             
 [59] xgboost_1.7.8.1           future.apply_1.11.3      
 [61] codetools_0.2-20          leiden_0.4.3.1           
 [63] glue_1.8.0                remotes_2.5.0            
 [65] png_0.1-8                 spam_2.11-0              
 [67] org.Mm.eg.db_3.18.0       cellranger_1.1.0         
 [69] gtable_0.3.6              cachem_1.1.0             
 [71] xfun_0.49                 S4Arrays_1.2.0           
 [73] mime_0.12                 survival_3.7-0           
 [75] bluster_1.12.0            statmod_1.5.0            
 [77] ellipsis_0.3.2            fitdistrplus_1.2-1       
 [79] ROCR_1.0-11               nlme_3.1-166             
 [81] bit64_4.5.2               RcppAnnoy_0.0.22         
 [83] irlba_2.3.5.1             vipor_0.4.7              
 [85] KernSmooth_2.23-24        DBI_1.2.3                
 [87] colorspace_2.1-1          nnet_7.3-19              
 [89] tidyselect_1.2.1          bit_4.5.0                
 [91] compiler_4.3.2            BiocNeighbors_1.20.2     
 [93] DelayedArray_0.28.0       plotly_4.10.4            
 [95] rtracklayer_1.62.0        scales_1.3.0             
 [97] lmtest_0.9-40             digest_0.6.37            
 [99] goftest_1.2-3             spatstat.utils_3.1-1     
[101] rmarkdown_2.29            XVector_0.42.0           
[103] htmltools_0.5.8.1         pkgconfig_2.0.3          
[105] sparseMatrixStats_1.14.0  fastmap_1.2.0            
[107] rlang_1.1.4               htmlwidgets_1.6.4        
[109] shiny_1.9.1               DelayedMatrixStats_1.24.0
[111] farver_2.1.2              zoo_1.8-12               
[113] jsonlite_1.8.9            BiocParallel_1.36.0      
[115] R.oo_1.27.0               BiocSingular_1.18.0      
[117] RCurl_1.98-1.16           magrittr_2.0.3           
[119] modeltools_0.2-23         GenomeInfoDbData_1.2.11  
[121] dotCall64_1.2             munsell_0.5.1            
[123] viridis_0.6.5             reticulate_1.35.0        
[125] stringi_1.8.4             zlibbioc_1.48.2          
[127] MASS_7.3-60.0.1           org.Hs.eg.db_3.18.0      
[129] plyr_1.8.9                pkgbuild_1.4.5           
[131] ggstats_0.7.0             parallel_4.3.2           
[133] listenv_0.9.1             ggrepel_0.9.6            
[135] deldir_2.0-4              Biostrings_2.70.3        
[137] splines_4.3.2             tensor_1.5               
[139] hms_1.1.3                 locfit_1.5-9.10          
[141] igraph_2.1.1              spatstat.geom_3.2-8      
[143] RcppHNSW_0.6.0            reshape2_1.4.4           
[145] ScaledMatrix_1.10.0       pkgload_1.4.0            
[147] XML_3.99-0.17             evaluate_1.0.1           
[149] scran_1.30.2              BiocManager_1.30.25      
[151] tzdb_0.4.0                httpuv_1.6.15            
[153] RANN_2.6.2                polyclip_1.10-7          
[155] future_1.34.0             scattermore_1.2          
[157] rsvd_1.0.5                xtable_1.8-4             
[159] restfulr_0.0.15           svMisc_1.2.3             
[161] RSpectra_0.16-2           later_1.3.2              
[163] viridisLite_0.4.2         AnnotationDbi_1.64.1     
[165] GenomicAlignments_1.38.2  memoise_2.0.1            
[167] beeswarm_0.4.0            tximport_1.28.0          
[169] cluster_2.1.6             timechange_0.3.0         
[171] globals_0.16.3